import pickle
from gensim import corpora, models
import os
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import sys
from util import JS_div

topic_number = 26
result_folder = '../data/results/new_topic_number_26/'
root_folder = '../data/tmp_files/new_topic_number_26/boosting_files/all_four/'
tmp_folder = root_folder + '0/'
congress_matrix = pickle.load(open(tmp_folder + 'congress_topic_model_matrix_4th_sample_current_only_0', 'rb'))
congress_name_list = pickle.load(open(tmp_folder + 'congress_name_list_4th_sample_current_only_0', 'rb'))
mayor_matrix = pickle.load(open(tmp_folder + 'mayor_topic_model_matrix_4th_sample_current_only_0', 'rb'))
mayor_name_list = pickle.load(open(tmp_folder + 'mayor_name_list_4th_sample_current_only_0', 'rb'))
governor_matrix = pickle.load(open(tmp_folder + 'governor_topic_model_matrix_4th_sample_current_only_0', 'rb'))
governor_name_list = pickle.load(open(tmp_folder + 'governor_name_list_4th_sample_current_only_0', 'rb'))

congress_dist_matrix = np.zeros((100, len(congress_matrix), len(congress_matrix)))
mayor_dist_matrix = np.zeros((100, len(mayor_matrix), len(mayor_matrix)))
governor_dist_matrix = np.zeros((100, len(governor_matrix), len(governor_matrix)))
congress_x_mayor_dist_matrix = np.zeros((100, len(congress_matrix), len(mayor_matrix)))
congress_x_governor_dist_matrix = np.zeros((100, len(congress_matrix), len(governor_matrix)))
for k in range(100):
    print(k)
    tmp_folder = root_folder + str(k) + '/'
    congress_matrix = pickle.load(open(tmp_folder + 'congress_topic_model_matrix_4th_sample_current_only_' + str(k), 'rb'))
    congress_name_list = pickle.load(open(tmp_folder + 'congress_name_list_4th_sample_current_only_' + str(k), 'rb'))
    mayor_matrix = pickle.load(open(tmp_folder + 'mayor_topic_model_matrix_4th_sample_current_only_' + str(k), 'rb'))
    mayor_name_list = pickle.load(open(tmp_folder + 'mayor_name_list_4th_sample_current_only_' + str(k), 'rb'))
    governor_matrix = pickle.load(open(tmp_folder + 'governor_topic_model_matrix_4th_sample_current_only_' + str(k), 'rb'))
    governor_name_list = pickle.load(open(tmp_folder + 'governor_name_list_4th_sample_current_only_' + str(k), 'rb'))

    for i in range(len(congress_matrix)-1):
        for j in range(0, len(congress_matrix)):
            if j > i:
                congress_dist_matrix[k][i][j] = JS_div(np.array(congress_matrix[i]), np.array(congress_matrix[j]))
    for i in range(len(mayor_matrix)-1):
        for j in range(0, len(mayor_matrix)):
            if j > i:
                mayor_dist_matrix[k][i][j] = JS_div(np.array(mayor_matrix[i]), np.array(mayor_matrix[j]))

    for i in range(len(governor_matrix)-1):
        for j in range(0, len(governor_matrix)):
            if j > i:
                governor_dist_matrix[k][i][j] = JS_div(np.array(governor_matrix[i]), np.array(governor_matrix[j]))

    for i in range(len(congress_matrix)):
        for j in range(len(mayor_matrix)):
            congress_x_mayor_dist_matrix[k][i][j] = JS_div(np.array(congress_matrix[i]), np.array(mayor_matrix[j]))

    for i in range(len(congress_matrix)):
        for j in range(len(governor_matrix)):
            congress_x_governor_dist_matrix[k][i][j] = JS_div(np.array(congress_matrix[i]), np.array(governor_matrix[j]))


pickle.dump(congress_dist_matrix, open(root_folder + 'congress_dist_matrix_4th_no_timelimit_all', 'wb'))
pickle.dump(mayor_dist_matrix, open(root_folder + 'mayor_dist_matrix_4th_no_timelimit_all', 'wb'))
pickle.dump(governor_dist_matrix, open(root_folder + 'governor_dist_matrix_4th_no_timelimit_all', 'wb'))
pickle.dump(congress_x_mayor_dist_matrix, open(root_folder + 'congress_x_mayor_dist_matrix_4th_no_timelimit_all', 'wb'))
pickle.dump(congress_x_governor_dist_matrix, open(root_folder + 'congress_x_governor_dist_matrix_4th_no_timelimit_all', 'wb'))

congress_dist_matrix_avg = np.mean(congress_dist_matrix, axis=0)
print(congress_dist_matrix.shape)
print(congress_dist_matrix_avg.shape)
mayor_dist_matrix_avg = np.mean(mayor_dist_matrix, axis=0)
governor_dist_matrix_avg = np.mean(governor_dist_matrix, axis=0)
congress_x_mayor_dist_matrix_avg = np.mean(congress_x_mayor_dist_matrix, axis=0)
congress_x_governor_dist_matrix_avg = np.mean(congress_x_governor_dist_matrix, axis=0)

writer = open(result_folder + 'congress_vs_congress_topic_space_4th_no_timelimit.csv', 'w')
first_line = ','.join(congress_name_list)
first_line = ',' + first_line + '\n'
writer.write(first_line)
for i in range(len(congress_matrix)-1):
    writer.write(congress_name_list[i])
    for j in range(0, len(congress_matrix)):
        writer.write(',')
        if j > i:
            writer.write(str(congress_dist_matrix_avg[i][j]))
    writer.write('\n')
writer.close()
pickle.dump(congress_dist_matrix_avg, open(root_folder + 'congress_dist_matrix_4th_no_timelimit_avg', 'wb'))

writer = open(result_folder + 'mayor_vs_mayor_topic_space_4th_no_timelimit.csv', 'w')
first_line = ','.join(mayor_name_list)
first_line = ',' + first_line + '\n'
writer.write(first_line)
for i in range(len(mayor_matrix)-1):
    writer.write(mayor_name_list[i])
    for j in range(0, len(mayor_matrix)):
        writer.write(',')
        if j > i:
            writer.write(str(mayor_dist_matrix_avg[i][j]))
    writer.write('\n')
writer.close()
pickle.dump(mayor_dist_matrix_avg, open(root_folder + 'mayor_dist_matrix_4th_no_timelimit_avg', 'wb'))

writer = open(result_folder + 'governor_vs_governor_topic_space_4th_no_timelimit.csv', 'w')
first_line = ','.join(governor_name_list)
first_line = ',' + first_line + '\n'
writer.write(first_line)
for i in range(len(governor_matrix)-1):
    writer.write(governor_name_list[i])
    for j in range(0, len(governor_matrix)):
        writer.write(',')
        if j > i:
            writer.write(str(governor_dist_matrix_avg[i][j]))
    writer.write('\n')
writer.close()
pickle.dump(governor_dist_matrix_avg, open(root_folder + 'governor_dist_matrix_4th_no_timelimit_avg', 'wb'))

writer = open(result_folder + 'congress_vs_mayor_topic_space_4th_no_timelimit.csv', 'w')
first_line = ','.join(mayor_name_list)
first_line = ',' + first_line + '\n'
writer.write(first_line)
for i in range(len(congress_matrix)):
    writer.write(congress_name_list[i])
    for j in range(len(mayor_matrix)):
        writer.write(',')
        writer.write(str(congress_x_mayor_dist_matrix_avg[i][j]))
    writer.write('\n')
pickle.dump(congress_x_mayor_dist_matrix_avg, open(root_folder + 'congress_x_mayor_dist_matrix_4th_no_timelimit_avg', 'wb'))
writer.close()

writer = open(result_folder + 'congress_vs_governor_topic_space_4th_no_timelimit.csv', 'w')
first_line = ','.join(governor_name_list)
first_line = ',' + first_line + '\n'
writer.write(first_line)
for i in range(len(congress_matrix)):
    writer.write(congress_name_list[i])
    for j in range(len(governor_matrix)):
        writer.write(',')
        writer.write(str(congress_x_governor_dist_matrix_avg[i][j]))
    writer.write('\n')
pickle.dump(congress_x_governor_dist_matrix_avg, open(root_folder + 'congress_x_governor_dist_matrix_4th_no_timelimit_avg', 'wb'))
writer.close()


congress_dist_matrix = pickle.load(open(root_folder + 'congress_dist_matrix_4th_no_timelimit_avg', 'rb'))
mayor_dist_matrix = pickle.load(open(root_folder + 'mayor_dist_matrix_4th_no_timelimit_avg', 'rb'))
congress_x_mayor_dist_matrix = pickle.load(open(root_folder + 'congress_x_mayor_dist_matrix_4th_no_timelimit_avg', 'rb'))
governor_dist_matrix = pickle.load(open(root_folder + 'governor_dist_matrix_4th_no_timelimit_avg', 'rb'))
congress_x_governor_dist_matrix = pickle.load(open(root_folder + 'congress_x_governor_dist_matrix_4th_no_timelimit_avg', 'rb'))

all_dis_congress = []
for i in range(len(congress_dist_matrix)-1):
    for j in range(i+1, len(congress_dist_matrix)):
        all_dis_congress.append(congress_dist_matrix[i][j])
median_congress = np.median(all_dis_congress)

all_dis_mayor = []
for i in range(len(mayor_dist_matrix)-1):
    for j in range(i+1, len(mayor_dist_matrix)):
        all_dis_mayor.append(mayor_dist_matrix[i][j])
median_mayor = np.median(all_dis_mayor)

all_dis_governor = []
for i in range(len(governor_dist_matrix)-1):
    for j in range(i+1, len(governor_dist_matrix)):
        all_dis_governor.append(governor_dist_matrix[i][j])
median_governor = np.median(all_dis_governor)

all_dis_congress_mayor = []
for i in range(len(congress_dist_matrix)):
    for j in range(len(mayor_dist_matrix)):
        all_dis_congress_mayor.append(congress_x_mayor_dist_matrix[i][j])
median_mayor_congress = np.median(all_dis_congress_mayor)

all_dis_congress_governor = []
for i in range(len(congress_dist_matrix)):
    for j in range(len(governor_dist_matrix)):
        all_dis_congress_governor.append(congress_x_governor_dist_matrix[i][j])
median_governor_congress = np.median(all_dis_congress_governor)


plt.figure()
weights = np.ones_like(all_dis_congress) / len(all_dis_congress)
plt.hist(all_dis_congress, bins=30, alpha=0.5, weights=weights, density=False,
         histtype='stepfilled', color='lightskyblue', edgecolor='b',
         label='Cong. vs. Cong. (median=%.3f)' % median_congress)

weights = np.ones_like(all_dis_mayor) / len(all_dis_mayor)
plt.hist(all_dis_mayor, bins=30, alpha=0.5, weights=weights, density=False,
         histtype='stepfilled', color='tomato', edgecolor='r',
         label='Mayor vs. Mayor (median=%.3f)' % median_mayor)


weights = np.ones_like(all_dis_congress_governor) / len(all_dis_congress_governor)
plt.hist(all_dis_congress_governor, bins=30, alpha=0.5, weights=weights, density=False,
         histtype='stepfilled', color='pink', edgecolor='magenta',
         label='Cong. vs. Governor (median=%.3f)' % median_governor_congress)


weights = np.ones_like(all_dis_governor) / len(all_dis_governor)
plt.hist(all_dis_governor, bins=30, alpha=0.5, weights=weights, density=False,
         histtype='stepfilled', color='khaki', edgecolor='y',
         label='Governor vs. Governor (median=%.3f)' % median_governor)

weights = np.ones_like(all_dis_congress_mayor) / len(all_dis_congress_mayor)
plt.hist(all_dis_congress_mayor, bins=30, alpha=0.5, weights=weights, density=False,
         histtype='stepfilled', color='mediumseagreen', edgecolor='g',
         label='Cong. vs. Mayor (median=%.3f)' % median_mayor_congress)

plt.ylabel('Frequency')
plt.xlabel('Tweets\' distances in topic model space')
plt.legend(loc=1)
plt.savefig(result_folder + 'tweets_topic_distances_distribution_4th_no_timelimit_no_soccer_paper_revision.pdf', format='pdf')
plt.close()